package com.novel.crawl.bxwx9.spider.pipeline;

import com.novel.crawl.common.entity.Book;
import com.novel.crawl.common.service.BookService;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @author zhilong.deng@hand-china.com
 * @date 2018/11/1
 * @version 1.0
 */
public class BookPipeline implements Pipeline {

    private static final Logger LOG = LoggerFactory.getLogger(BookPipeline.class);

    private static final Pattern BOOK_NAME_PATTERN = Pattern.compile("\\s+>\\s+>(.+)");
    private static final Pattern BOOK_DESC_PATTERN = Pattern.compile("bxwx9.org(.*)wWw.bxwx9.org");

    private final BookService bookService;

    public BookPipeline(BookService bookService) {
        this.bookService = bookService;
    }

    @Override
    public void process(ResultItems resultItems, Task task) {
        String name = resultItems.get("name").toString();
        Matcher nameMatcher = BOOK_NAME_PATTERN.matcher(name);
        if (nameMatcher.find()) {
            name = nameMatcher.group(1);
        }
        String type = resultItems.get("type").toString();
        String url = resultItems.get("url").toString();
        String image = resultItems.get("image").toString();
        String author = resultItems.get("author").toString();
        String desc = resultItems.get("desc").toString();
        String status = resultItems.get("status").toString();
        Matcher descMatcher = BOOK_DESC_PATTERN.matcher(desc);
        if (descMatcher.find()) {
            desc = descMatcher.group(1);
        }
        Book book = new Book();
        book.setAuthor(StringUtils.trim(author));
        book.setName(StringUtils.trim(name));
        book.setUrl(StringUtils.trim(url));
        book.setImage(StringUtils.trim(image));
        book.setType(StringUtils.trim(type));
        book.setSource(Book.SOURCE_TYPE_BXWX);
        book.setDescription(StringUtils.trim(desc));
        book.setStatus(StringUtils.trim(status));
        Book persistBook = bookService.findByUrl(url);
        if (persistBook == null) {
            LOG.debug("new book, bookUrl: {}", url);
            bookService.add(book);
        } else {
            LOG.debug("book exist, bookId: {}", persistBook.getId());
            persistBook.setAuthor(book.getAuthor());
            persistBook.setName(book.getName());
            persistBook.setUrl(book.getUrl());
            persistBook.setImage(book.getImage());
            persistBook.setType(book.getType());
            persistBook.setDescription(book.getDescription());
            persistBook.setStatus(book.getStatus());
            bookService.modify(persistBook);
        }
    }
}
